/// Replication code for: Determinants of Refugee Naturalization in the United States.

/// Note: The individual-level data used in this study cannot be shared or posted publicly and are
/// controlled by the Department of Homeland Security (see SM for details).

/// Load Refugee Data (see paper and SM for details)
clear
use "REFlprNATZv2.dta", clear

/// merge in geo level data based on year and place of arrival
gen state      = rstate
gen city_upper = rcity
gen year       = rarrivefy     

merge m:1 city_upper state year using "tomerge_strict.dta" 

/// Sample restrictions

// merge place of arrival
keep if _merge==3
keep if county_name!=""

// age missing
drop if rage==.

// below 13 
drop if rage < 13

// cubans
drop if rnationality=="CUBA"

// gender
drop if rgender == "U"

// post 2010 arrival 
drop if rarrivefy>2010

// unaccompanied minors
drop if uac==1

// agency missing
drop if rra==""

/// recode variables

// city and educ
replace rcity           = "99" if rcity==""
replace reducation      = "99" if reducation==""

// invalid year of naturalization
drop if ndatefy==1962

// top 10 nationalities
bys  rnationality: gen rnat_count = _N
gen  rnat_countneg = -1*rnat_count
sort rnat_countneg rnationality , stable
egen rnat_rank1 = group(rnat_countneg rnationality)

gen     rnat_top    = rnationality
replace rnat_top    = "Other" if rnat_rank1 > 10

encode  rnat_top , gen(rnat_top_n)
replace rnat_top_n = 99 if  rnat_top    == "Other"


// education 
gen     reduccat = "None/Unknown" 
replace reduccat = "None/Unknown"                if reducation=="" | reducation=="UNKNOWN" | reducation=="NONE"
replace reduccat = "Primary"                     if reducation=="KINDERGARTEN" | reducation=="PRIMARY" | reducation=="INTERMEDIATE"
replace reduccat = "Less than Secondary"         if reducation=="SECONDARY" 
replace reduccat = "Secondary"                   if reducation=="TECHNICAL SCHOOL" | reducation=="PRE-UNIVERSITY" | reducation=="PROFESSIONAL"
replace reduccat = "University"                  if reducation=="UNIVERSITY/COLLEGE" | reducation=="GRADUATE SCHOOL" 
encode  reduccat  , gen(reduccat_n)
replace reduccat_n = 99 if  reduccat == "None/Unknown" 

// agency
encode  rra  , gen(rra_n)
replace rra_n = 99 if  rra == "HIAS" 

// free case
gen     free = 0
replace free = 1 if rpool == "NO U.S. TIES CASES"
gen     free_n = free 
replace free_n = 99 if rpool != "NO U.S. TIES CASES" 

// relationship
gen     relat="CHILD" 		if rrelationship =="CHILD"
replace relat="PA" 			if rrelationship =="PA"
replace relat="SPOUSE" 		if rrelationship =="SPOUSE"
replace relat="PARENT" 		if rrelationship =="PARENT"
replace relat="SIBLING" 	if rrelationship =="SIBLING"
replace relat="OTHER" 		if rrelationship =="OTHER" | rrelationship =="EXSPOUSE" | rrelationship =="UNION"

encode  relat , gen(relat_n)
replace relat_n = 99 if relat=="PA"

// gender
encode  rgender , gen(rgender_n)
replace rgender_n = 99 if rgender=="M"

// age buckets
gen     rage_n = 99 if rage>=13 & rage<=20
replace rage_n = 1  if rage>=21 & rage<=30
replace rage_n = 2  if rage>=31 & rage<=40
replace rage_n = 3  if rage>=41 & rage<=50
replace rage_n = 4  if rage>=51 & rage<=60
replace rage_n = 5  if rage>=61 & rage!=.

// residency bins
gen      rarrivefy_n = 99 if rarrivefy==2010 | rarrivefy==2009
replace  rarrivefy_n = 1  if rarrivefy==2008 | rarrivefy==2007 
replace  rarrivefy_n = 2  if rarrivefy==2006 | rarrivefy==2005 
replace  rarrivefy_n = 3  if rarrivefy==2004 | rarrivefy==2003 
replace  rarrivefy_n = 4  if rarrivefy==2002 | rarrivefy==2001 | rarrivefy==2000
gen      rarrivefy_c = 2010 - rarrivefy

// county
rename  county_name county
replace county = county + ", " + state
encode  county , gen(county_n)
replace county_n = 99 if county=="Los Angeles County, CA"
bys county: gen county_count = _N
drop if county_count < 200

// state
encode rstate, gen(rstate_n)
bys rstate: gen rstate_count = _N
sum rstate_count, det
replace rstate_n = 99 if rstate=="PA"

// case size
bys     rcasenumber: gen casesize = _N
gen     casesize_n = casesize
replace casesize_n = 4  if casesize>=4
replace casesize_n = 99 if casesize==1


// match to LPR 
gen     match1_r_l = 0 if flag_lpr == . | flag_lpr == 0  
replace match1_r_l = 1 if flag_lpr == 1 | flag_lpr==2 | flag_lpr == 3

// match to natz
gen     match1_r_n = 0 if flag_natz == . | flag_natz == 0
replace match1_r_n = 1 if flag_natz == 1 | flag_natz == 2 | flag_natz == 3

/// Descriptive stats

// 6 year cohort nat rate

// date of natz vs date of arrival (in years)
gen yearstonatz=floor(((ym(year(nnatdate), month(nnatdate)) - ym(year(rdtarrive), month(rdtarrive))) - (day(nnatdate) < day(rdtarrive))) / 12)
tab yearstonatz, mis

// 6 year rate 
gen     match16_r_n = match1_r_n  
replace match16_r_n = 0 if yearstonatz>6 & match1_r_n==1 & yearstonatz!=.
tab match16_r_n

// match rate to LPR
tab match1_r_l

// match rate to LPR by arrival year
foreach x of varlist match1_r_l  {
 tabstat `x' , by(rarrivefy) st(mean n)
}


// natz rates
tab match1_r_n 

// by arrival year (Figure 1)
foreach x of varlist match1_r_n  match16_r_n  {
 tabstat `x' , by(rarrivefy) st(mean n)
 tabstat `x' if match1_r_l==1 , by(rarrivefy) st(mean n)
}

// display
foreach x of varlist  match1_r_n {
 foreach z of varlist rarrivefy { 
estpost tabstat `x' , by(`z') ///
     statistics(mean n) columns(statistics) listwise
esttab using "mean_`x'_by_`z'.csv" , cells(mean count) nostar  ///
      nonote nomtitle nonumber noobs replace wide
 }
}

/// regressions

// fig2 2: individual level predictors, arrival county, and year FEs
global x1 = "ib99.relat_n ib99.rgender_n ib99.reduccat_n ib99.rnat_top_n ib99.free_n ib99.rra_n ib99.rage_n ib99.rarrivefy_n ib99.county_n ib99.casesize_n"
reg match1_r_n $x1 , cl(rcasenumber)
eststo m1
esttab m1 using "natz1all.csv" , replace wide label plain se
eststo clear


// Figure 3: add county level characteristics

// share of co-nationals (no data for Buthan)
gen     coshare = .
replace coshare = nat_bosnia   if rnat_top=="BOSNIA AND HERZEGOVINA" & coshare==.
replace coshare = nat_burma    if rnat_top=="BURMA" & coshare==.
replace coshare = nat_iran     if rnat_top=="IRAN" & coshare==.
replace coshare = nat_iraq     if rnat_top=="IRAQ" & coshare==.
replace coshare = nat_liberia  if rnat_top=="LIBERIA" & coshare==.
replace coshare = nat_russia   if rnat_top=="RUSSIA" & coshare==.
replace coshare = nat_somalia  if rnat_top=="SOMALIA" & coshare==.
replace coshare = nat_ukraine  if rnat_top=="UKRAINE" & coshare==.
replace coshare = nat_vietnam  if rnat_top=="VIETNAM" & coshare==.


// 4 equal sized bins
foreach x of varlist unemp_rate pcturban_imp coshare {
egen    gr`x' = cut(`x') , group(4)
replace gr`x' = 99 if gr`x'==0
}

// fit model 
global x1 = "ib99.relat_n ib99.rgender_n ib99.reduccat_n ib99.rnat_top_n ib99.free_n ib99.rra_n ib99.rage_n ib99.rarrivefy_n ib99.casesize_n"
global x4 = "ib99.grunemp_rate ib99.grpcturban_imp ib99.grcoshare"
reg match1_r_n $x1 $x4 , cl(rcasenumber) 
eststo m1
esttab m1 using "natzgeoreg4.csv" , replace wide label plain se
eststo clear


// Figure S.1: replicate Figure 2 but only refugees matched to LPR records
global x1 = "ib99.relat_n ib99.rgender_n ib99.reduccat_n ib99.rnat_top_n ib99.free_n ib99.rra_n ib99.rage_n ib99.rarrivefy_n ib99.county_n ib99.casesize_n"
reg match1_r_n $x1 , cl(rcasenumber) , if match1_r_l==1
eststo m1
esttab m1 using "natz1matchlpr.csv" , replace wide label plain se
eststo clear


// Figure S.2: replicate Figure 2 but single person cases
reg match1_r_n $x1 , cl(rcasenumber) , if casesize_n==99
eststo m1
esttab m1 using "natz1casesize1.csv" , replace wide label plain se
eststo clear

// Figure S.3: replicate Figure 2 but with arrival state FEs
global x1a = "ib99.relat_n ib99.rgender_n ib99.reduccat_n ib99.rnat_top_n ib99.free_n ib99.rra_n ib99.rage_n ib99.rarrivefy_n ib99.rstate_n ib99.casesize_n"
reg match1_r_n $x1a , cl(rcasenumber)
eststo m1
esttab m1 using "natz1allstate.csv" , replace wide label plain se
eststo clear



